county_fips = read_csv("./data/all-geocodes-v2016.csv", skip = 4) %>%
  janitor::clean_names() %>%
  rename(county_name = area_name_including_legal_statistical_area_description) %>%
  filter(county_code_fips != "000") %>%
  mutate(state_county_fips = paste0(state_code_fips, county_code_fips)) %>%
  select (state_code_fips, county_code_fips, state_county_fips, county_name)
## Parsed with column specification:
## cols(
##   `Summary Level` = col_character(),
##   `State Code (FIPS)` = col_character(),
##   `County Code (FIPS)` = col_character(),
##   `County Subdivision Code (FIPS)` = col_character(),
##   `Place Code (FIPS)` = col_character(),
##   `Consolidtated City Code (FIPS)` = col_character(),
##   `Area Name (including legal/statistical area description)` = col_character()
## )
state_fips = read_csv("./data/state-geocodes-v2016.csv", skip = 5) %>%
  janitor::clean_names() %>%
  filter(state_fips != "00") %>%
  select(state_fips, name) %>%
  mutate(state_fips = as.numeric(state_fips)) %>%
  rename(state = name)
## Parsed with column specification:
## cols(
##   Region = col_double(),
##   Division = col_double(),
##   `State (FIPS)` = col_character(),
##   Name = col_character()
## )
bee_county = read_csv("./data/bee_county/Bee_Colony_Census_Data_by_County 2.csv") %>% 
  janitor::clean_names() %>% 
  select(year, 
         state, 
         state_ansi, 
         ag_district, 
         ag_district_code, 
         county, 
         county_ansi, 
         value, 
         cv_percent) 
## Parsed with column specification:
## cols(
##   Year = col_double(),
##   Period = col_character(),
##   State = col_character(),
##   `State ANSI` = col_double(),
##   `Ag District` = col_character(),
##   `Ag District Code` = col_double(),
##   County = col_character(),
##   `County ANSI` = col_double(),
##   Value = col_character(),
##   `CV (%)` = col_character()
## )
 bee_county$state_ansi = stringr::str_pad(bee_county$state_ansi,2 , pad = "0")
bee_county$county_ansi = stringr::str_pad(bee_county$county_ansi,3 , pad = "0")

bee_county = bee_county%>% 
  mutate(state_county_fips = paste0(state_ansi, county_ansi),
    value = replace(value, value == "(D)", "NA"),
    cv_percent = replace(cv_percent, cv_percent == "(D)", "NA"),
    colony_count = value)
#create function to import data
file_name <- list.files(path = "./data/bee_state_2/") 

df = read_csv(file = str_c("./data/bee_state_2/", file_name[1])) %>%
  mutate(file = file_name[1])
## Parsed with column specification:
## cols(
##   `2` = col_double(),
##   t = col_character(),
##   `Honey: Released February 28, 2002, by the National Agricultural Statistics Service (NASS), Agricultural Statistics Board, U.S. Department of Agriculture.` = col_character(),
##   X4 = col_character(),
##   X5 = col_character(),
##   X6 = col_character(),
##   X7 = col_character(),
##   X8 = col_character(),
##   X9 = col_character()
## )
  df
## # A tibble: 51 x 10
##      `2` t     `Honey: Released … X4    X5    X6    X7    X8    X9    file 
##    <dbl> <chr> <chr>              <chr> <chr> <chr> <chr> <chr> <chr> <chr>
##  1     2 t     Honey:  Number of… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  2     2 t     and Value by Stat… <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  3     2 h     <NA>               <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  4     2 h     <NA>               Honey Yield <NA>  <NA>  Aver… Value 2002…
##  5     2 h     State              Prod… per   Prod… Stoc… Pric… of    2002…
##  6     2 h     <NA>               Colo… Colo… <NA>  Dec … Poun… Prod… 2002…
##  7     2 h     <NA>               <NA>  <NA>  <NA>  <NA>  <NA>  <NA>  2002…
##  8     2 u     <NA>               1,000 Poun… 1,00… 1,00… Cents 1,00… 2002…
##  9     2 d     AL                 16    78    1248  187   59    736   2002…
## 10     2 d     AZ                 40    59    2360  1322  73    1723  2002…
## # … with 41 more rows
my_read_csv = function(x){
    df = read_csv(x, skip = 9, col_names  = F)
}

my_read_csv = function(x){
    df = read_csv(x, skip = 9, col_names  = F)
}
  
bee_data = 
  tibble(
    file_names = file_name,
    path = str_c("./data/bee_state_2/", file_names)
  ) %>% 
  mutate(data = map(path, my_read_csv))%>% 
  unnest()
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X2 = col_character(),
##   X3 = col_character(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double(),
##   X7 = col_double(),
##   X8 = col_double(),
##   X9 = col_double()
## )
#clean data set
clean_bee_data =
  bee_data %>%
 separate(file_names, into = c("year", "remove"), sep = ".c") %>%
  select(-remove, -X1, -X2, -path) %>%
  rename(state = X3, honey_producing_colonies = X4, yield_per_colony = X5, production = X6, stocks = X7, price_per_pound = X8, production_value = X9) %>%
  drop_na(state) %>%
  mutate(state = recode(state, AL = "Alabama", AR = "Arkansas", AZ = "Arizona", CA = "California", CO = "Colorado", FL = "Florida", GA = "Georgia", HI = "Hawaii", IA = "Iowa", IL = "Illinois", ID = "Idaho", IN = "Indiana", KS = "Kansas", KY = "Kentucky", LA = "Louisiana", ME = "Maine", MD = "Maryland", MI = "Michigan", MN = "Minnesota", MO = "Missouri", MT = "Montana", MS = "Mississippi", NC = "North Carolina", ND = "North Dakota", NE = "Nebraska", NJ = "New Jersey", NM = "New Mexico", NV = "Nevada", NY = "New York", OH = "Ohio", OK = "Oklahoma", OR = "Oregon", PA = "Pennsylvania", SC = "South Carolina", SD = "South Dakota", TN = "Tennessee", TX = "Texas", UT = "Utah", VA = "Virginia", VT = "Vermont", WA = "Washington", WV = "West Virginia", WI = "Wisconsin", WY = "Wyoming"  ))

clean_bee_data %>%
  ggplot(aes(x = year, y = honey_producing_colonies, color = state)) +
  geom_point()

pest_2002 = read_excel("./data/pesticides_csv/EPest.county.estimates.2002.xlsx") %>% 
  janitor::clean_names() %>% 
  mutate(
    state_fips = state_fips_code,
    county_fips = county_fips_code,
    state_county_fips = paste0(state_fips, county_fips),
    epest_low_kg = round(epest_low_kg),
    epest_high_kg = round(epest_high_kg)) %>%
  select(-state_fips_code, -county_fips_code)
    
pest_2002 %>% 
  group_by(year, state_county_fips, compound) %>% 
  ggplot(aes(x = compound, y = epest_high_kg)) + 
  geom_col() 

unique(pull(pest_2002, compound))
state_bee_fips = full_join(clean_bee_data, state_fips, by = "state")

top_pesticides = read_csv("./data/top_pesticides.csv") %>%
    mutate(state_fips = as.numeric(state_fips))
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
state_pest_data = 
  top_pesticides %>%
  group_by(state_fips, year, compound) %>%
  summarize(low = sum(epest_low_kg), high = sum(epest_high_kg)) 
state_bee_yrfips = 
  state_bee_fips %>%
  mutate(yearfips = paste0(year, state_fips))

state_pest_yrfips = 
  state_pest_data %>%
  mutate(yearfips = paste0(year, state_fips))

merged_state_data = full_join(state_bee_yrfips, state_pest_yrfips, by = "yearfips") %>%
  select(year.x, state, state_fips.x, honey_producing_colonies, yield_per_colony, production, compound, low, high) %>%
  drop_na(year.x, state, compound, high)
county_bee_yrfips = 
  bee_county %>%
  mutate(yearfips = paste0(year, state_county_fips))
         
county_pest_yrfips = 
  top_pesticides %>%
  mutate(state_county_fips = as.numeric(state_county_fips) ,
         yearfips = paste0(year, state_county_fips))

merged_county_data = full_join(county_bee_yrfips, county_pest_yrfips, by = "yearfips") %>%
  select(year.x, state, county, county_fips, colony_count, compound, epest_low_kg, epest_high_kg) %>%
  drop_na(year.x, state, county, colony_count, compound, epest_high_kg)
#correlation of state level means across all years

corr_state = read_csv("./data/top_pesticides.csv") %>% 
  group_by(compound, state_fips) %>%
  summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>% 
  pivot_wider(
    names_from = "compound",
    values_from = "mean_pest_high",
  ) %>% 
  select(-state_fips)
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
corr_state %>%
  view()

matrix_state_1 = cor(corr_state, use = "everything", method = c("pearson"))
matrix_state_1
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN FIPRONIL
## CHLOROTHALONIL      1.0000000    0.4802217    0.0571605       NA
## CHLORPYRIFOS        0.4802217    1.0000000    0.5316680       NA
## CLOTHIANIDIN        0.0571605    0.5316680    1.0000000       NA
## FIPRONIL                   NA           NA           NA        1
## IMIDACLOPRID        0.4953138    0.8741880    0.3934778       NA
## THIACLOPRID                NA           NA           NA       NA
##                IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL    0.4953138          NA
## CHLORPYRIFOS      0.8741880          NA
## CLOTHIANIDIN      0.3934778          NA
## FIPRONIL                 NA          NA
## IMIDACLOPRID      1.0000000          NA
## THIACLOPRID              NA           1
matrix_state_2 = cor(corr_state, use = "complete.obs", method = c("pearson"))
matrix_state_2
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN     FIPRONIL
## CHLOROTHALONIL      1.0000000   0.70246745    0.3117681 -0.122623772
## CHLORPYRIFOS        0.7024675   1.00000000    0.5756553  0.001349550
## CLOTHIANIDIN        0.3117681   0.57565529    1.0000000  0.506889870
## FIPRONIL           -0.1226238   0.00134955    0.5068899  1.000000000
## IMIDACLOPRID        0.6541162   0.97497272    0.5498456 -0.003886203
## THIACLOPRID         0.1766534   0.12367034   -0.1679516 -0.056247127
##                IMIDACLOPRID  THIACLOPRID
## CHLOROTHALONIL  0.654116214  0.176653447
## CHLORPYRIFOS    0.974972715  0.123670345
## CLOTHIANIDIN    0.549845616 -0.167951571
## FIPRONIL       -0.003886203 -0.056247127
## IMIDACLOPRID    1.000000000 -0.002073838
## THIACLOPRID    -0.002073838  1.000000000
#correlation of country level mean by year 

corr_country = read_csv("./data/top_pesticides.csv") %>% 
  group_by(compound, year) %>%
  summarise(mean_pest_high = mean(epest_high_kg, na.rm = TRUE)) %>% 
  pivot_wider(
    names_from = "compound",
    values_from = "mean_pest_high",
  ) %>%
  ungroup() %>%
  select(-year)
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   compound = col_character(),
##   year = col_double(),
##   epest_low_kg = col_double(),
##   epest_high_kg = col_double(),
##   state_fips = col_character(),
##   county_fips = col_character(),
##   state_county_fips = col_character()
## )
matrix_country_1 = cor(corr_country, use = "everything", method = c("pearson"))
matrix_country_1
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN    FIPRONIL
## CHLOROTHALONIL     1.00000000  -0.30707012   0.05973758 -0.59954253
## CHLORPYRIFOS      -0.30707012   1.00000000   0.19330789 -0.02526745
## CLOTHIANIDIN       0.05973758   0.19330789   1.00000000 -0.45653473
## FIPRONIL          -0.59954253  -0.02526745  -0.45653473  1.00000000
## IMIDACLOPRID       0.22768878  -0.14992739   0.87269447 -0.58062939
## THIACLOPRID       -0.01268331  -0.28960638   0.31040782 -0.29736183
##                IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL    0.2276888 -0.01268331
## CHLORPYRIFOS     -0.1499274 -0.28960638
## CLOTHIANIDIN      0.8726945  0.31040782
## FIPRONIL         -0.5806294 -0.29736183
## IMIDACLOPRID      1.0000000  0.40945350
## THIACLOPRID       0.4094535  1.00000000
matrix_country_2 = cor(corr_country, use = "complete.obs", method = c("pearson"))
matrix_country_2
##                CHLOROTHALONIL CHLORPYRIFOS CLOTHIANIDIN    FIPRONIL
## CHLOROTHALONIL     1.00000000  -0.30707012   0.05973758 -0.59954253
## CHLORPYRIFOS      -0.30707012   1.00000000   0.19330789 -0.02526745
## CLOTHIANIDIN       0.05973758   0.19330789   1.00000000 -0.45653473
## FIPRONIL          -0.59954253  -0.02526745  -0.45653473  1.00000000
## IMIDACLOPRID       0.22768878  -0.14992739   0.87269447 -0.58062939
## THIACLOPRID       -0.01268331  -0.28960638   0.31040782 -0.29736183
##                IMIDACLOPRID THIACLOPRID
## CHLOROTHALONIL    0.2276888 -0.01268331
## CHLORPYRIFOS     -0.1499274 -0.28960638
## CLOTHIANIDIN      0.8726945  0.31040782
## FIPRONIL         -0.5806294 -0.29736183
## IMIDACLOPRID      1.0000000  0.40945350
## THIACLOPRID       0.4094535  1.00000000
#visualization 

corrplot(matrix_state_2, order = "hclust", type = "full")

corrplot(matrix_country_2, order = "hclust", type = "full")